package org.cagiobox.contactfinder;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;

public class Extractor {

	static Logger log = Logger.getLogger(Extractor.class.getName());

	private String emailRegex;
	private Pattern emailPattern;
	private Matcher match;

	public Extractor() {

		// "mailto:info@ActivistEyewear.com"
		// "mailto:(.*)"

		// emailRegex = ".*(\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,4}\\b).*";
		// emailRegex=
		// "([\\w\\-]([\\.\\w])+[\\w]+@([\\w\\-]+\\.)+[A-Za-z]{2,4})";

		// emailRegex = ".*\"mailto:(.*)\"";
		emailRegex = "mailto";
		emailPattern = Pattern.compile(emailRegex);
	}

	public String extractEmail(String html) {

		String email = "";
		boolean useRegExp = false;

		if (useRegExp) {

			match = emailPattern.matcher(html);

			if (match.matches()) {

				email = match.group(1);
				log.debug("email:" + email);

			} else {
				log.info("Nessuna email presente!");
			}
		} else {
			String match = "mailto"; //"mailto:\"";
			int posStart = html.indexOf(match);
			if (posStart > -1) {
				int posEnd = html.indexOf("\"", posStart);

				email = html.substring(posStart + match.length()+1, posEnd);
			}
		}

		return email;
	}

}
